# coding=utf8
import time
from selenium.webdriver.common.keys import Keys

from libs.sele import driver_new_session
from models.fb import model_fb_user
from bs4 import BeautifulSoup


def load_more(change2async=False):
    while 1:
        if change2async:
            driver.execute_script(
                "document.getElementById('more_search_results').getElementsByTagName('a')[0]"
                ".setAttribute('rel','async')"
            )
        try:
            elem = driver.find_element_by_xpath(".//a[@class='pam uiBoxLightblue uiMorePagerPrimary']")
            elem.click()
        except:
            print('no more page')
            return False
        print('get more')
        time.sleep(2)


def facebook_login(driver, username, password):
    print("Login to Facebook....")
    url = "http://www.facebook.com"
    driver.get(url)
    elem = driver.find_element_by_id("email")
    elem.send_keys(username)
    elem = driver.find_element_by_id("pass")
    elem.send_keys(password)
    elem.send_keys(Keys.RETURN)
    time.sleep(2)
    print("Login Success\n")


def get_group_number(driver):
    url = 'https://www.facebook.com/groups/1442306096065026/members/'
    group_id = '1442306096065026'
    driver.get(url)

    load_more()

    # 管理员暂时手工排除吧
    url_list = []
    for td in driver.find_elements_by_css_selector('.uiGrid td'):
        try:
            a = td.find_element_by_css_selector('.fsl.fwb.fcb').find_element_by_tag_name('a')
            href = a.get_attribute('href')
        except:
            continue
        href_about = href[:href.find('?')] + '/about'
        message_id = href_about.split('/')[3]
        url_list.append({
            'Name': a.get_attribute('innerHTML'),
            'HomePage': href_about,
            'MessageId': message_id,
            'GroupId': int(group_id)
        })
    return url_list


def get_profile(driver, url_list):
    for user in url_list:
        print(user['HomePage'])
        if 'profile.php' in user['HomePage']:
            continue
        if model_fb_user.find_one({'GroupId': user['GroupId'], 'HomePage': user['HomePage']}) is not None:
            print("已收录")
            continue
        driver.get(user['HomePage'])
        user_info = {}
        # 先根据概览页判断哪些要爬,在进入查看具体
        info_list_idx = []
        idx = 0
        last_span = ''
        for info in driver.find_elements_by_css_selector('._4bl7 .uiList li'):
            span = BeautifulSoup(info.get_attribute('innerHTML'), 'lxml').get_text(strip=True)
            if not span or last_span == span:
                continue
            idx += 1
            last_span = span
            if '沒有' in span:
                continue
            if idx > 4:
                break
            info_list_idx.append(idx)
        if not info_list_idx:
            continue
        for idx in info_list_idx:  # 根据映射去获取信息
            try:
                driver, user_profile = profile_map[idx](driver)
                user_info.update(user_profile)
            except:
                continue
        # 个人信息是一定有的
        driver, user_profile = nav_basic(driver)
        user_info.update(user_profile)

        user.update(user_info)
        yield user
        time.sleep(5)


def nav_edu(driver):
    driver.find_element_by_css_selector('._4bl9 .uiList li:nth-child(2) a').click()
    time.sleep(2)
    edu_list = []
    for li in driver.find_elements_by_css_selector('._4qm1:nth-child(2) li'):
        edu = BeautifulSoup(li.find_element_by_class_name('_6a').get_attribute('innerHTML'), 'lxml').get_text()
        edu_list.append(edu)

    return driver, {'Education': edu_list}


def nav_work(driver):
    driver.find_element_by_css_selector('._4bl9 .uiList li:nth-child(2) a').click()
    time.sleep(2)
    work_list = []
    for li in driver.find_elements_by_css_selector('._4qm1:nth-child(1) li'):
        work = BeautifulSoup(li.find_element_by_class_name('_6a').get_attribute('innerHTML'), 'lxml').get_text()
        work_list.append(work)

    return driver, {'Work': work_list}


def nav_places(driver):
    driver.find_element_by_css_selector('._4bl9 .uiList li:nth-child(3) a').click()
    time.sleep(2)
    places_list = {}
    for li in driver.find_elements_by_css_selector('.uiList.fbProfileEditExperiences li'):
        places = BeautifulSoup(li.get_attribute('innerHTML'), 'lxml').get_text(strip=True)
        if '現居城市' in places:
            places_list['Current'] = places.replace('現居城市', '')
        else:
            places_list['Hometown'] = places.replace('家鄉', '')
    return driver, {'Places': places_list}


def nav_basic(driver):
    driver.find_element_by_css_selector('._4bl9 .uiList li:nth-child(4) a').click()
    time.sleep(2)
    basic_list = {}
    for li in driver.find_elements_by_css_selector('#pagelet_basic li._3pw9'):
        spans = li.find_elements_by_tag_name('span')
        if spans[0].get_attribute('innerHTML') == '戀愛性向':
            basic_list['Sexuality'] = spans[1].get_attribute('innerHTML')
        if spans[0].get_attribute('innerHTML') == '性別':
            basic_list['Sex'] = spans[1].get_attribute('innerHTML')
    return driver, {'Basic': basic_list}


def nav_relationships(driver):
    driver.find_element_by_css_selector('._4bl9 .uiList li:nth-child(5) a').click()
    time.sleep(2)
    status = BeautifulSoup(driver.find_element_by_css_selector('._4qm1:nth-child(1) li').get_attribute('innerHTML'),
                           'lxml').get_text()
    # status = "未婚" if "没有" in status else status
    return driver, {'MaritalStatus': status}


profile_map = {
    1: nav_edu,
    2: nav_work,
    3: nav_places,
    4: nav_relationships
}

if __name__ == '__main__':
    username = 'holq8s4w2Iijl@zlivemail.com'
    password = 'M4423Q7Xgi'

    driver = driver_new_session()
    facebook_login(driver, username, password)

    user_list = get_group_number(driver)
    print("获取用户列表完毕")

    for user_profile_all in get_profile(driver, user_list):
        model_fb_user.insert_one(user_profile_all)

    driver.close()
